**********************************************************************
*Sample cleaning from CPS 
**********************************************************************
clear all
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/data/cps2023"

snapshot erase _all

*************************
*BRING IN AND EDIT CPS
*************************
use "cps_2023.dta", clear

gen LOSSYEAR  = .
replace LOSSYEAR = YEAR - DWLASTWRK if DWSTAT == 1 & DWLASTWRK <= 5


*************************
*MERGE VARIABLES ON TO CPS
*************************
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis/data/bea"
sort STATEFIP LOSSYEAR
merge m:1 STATEFIP LOSSYEAR using "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis\data\bea\gdp per capita.dta" , keepusing(GDPPERCAPITA_LOSSYEAR GDP_LOSSYEAR)
keep if _merge == 1 | _merge ==3
drop _merge

rename GDP_LOSSYEAR tGDP_LOSSYEAR

cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis/data/bea"
sort STATEFIP LOSSYEAR
merge m:1 STATEFIP LOSSYEAR using 19632018.dta, keepusing(GDP_LOSSYEAR)
keep if _merge == 1 | _merge ==3
drop _merge

replace tGDP_LOSSYEAR = GDP_LOSSYEAR if tGDP_LOSSYEAR == [.]
drop GDP_LOSSYEAR


cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis/data/bls"
sort STATEFIP LOSSYEAR
merge m:1 STATEFIP LOSSYEAR using stateemp_mj.dta, keepusing(UNEMPRATE_LOSSYEAR EMPPOP)
keep if _merge == 1 | _merge ==3
drop _merge



*************************
*CLEAN WORKER VARIABLES
*************************

*Education var
gen EDUC_CLEAN = .
replace EDUC_CLEAN = 8 if inrange(EDUC99,1,5)
replace EDUC_CLEAN = 9 if EDUC99 == 6
replace EDUC_CLEAN = 10 if EDUC99 == 7
replace EDUC_CLEAN = 11 if EDUC99 == 8
replace EDUC_CLEAN = 12 if inlist(EDUC99, 9, 10)
replace EDUC_CLEAN = 13 if EDUC99 == 11
replace EDUC_CLEAN = 14 if inlist(EDUC99, 13, 14)
replace EDUC_CLEAN = 16 if EDUC99 == 15
replace EDUC_CLEAN = 18 if inlist(EDUC99, 16, 17, 18)
drop EDUC EDUC99

*Experience var
gen EXP_LOSSYEAR = .
replace EXP_LOSSYEAR = AGE - (YEAR - LOSSYEAR) - (EDUC_CLEAN +5) if AGE > 16 
replace EXP_LOSSYEAR = AGE - (EDUC_CLEAN +5) if LOSSYEAR == . & AGE > 16
*can I drop all obs without a loss year?


*Age var
gen AGE_LOSSYEAR = .
replace AGE_LOSSYEAR = AGE - (YEAR - LOSSYEAR)
replace AGE_LOSSYEAR = AGE if LOSSYEAR == . 
rename AGE AGE_ORIGINAL

*Recode FT & UNION
recode DWFULLTIME (1 =0) (2=1 ) (3/99 = .a)
label define yesno 0 "No" 1 "Yes" .a "NA"
label values DWFULLTIME yesno  
recode DWUNION (1 =0) (2=1 ) (3/99 = .a)
label values DWUNION yesno  

*Recode DWYEARS
recode DWYEARS (99/100 = .a)

*Recode DWHI
recode DWHI (1 =0) (2=1 ) (3/99 = .a)
label values DWHI yesno  

*Create PLANTCLOSING and LAYOFF dummies
gen PLANTCLOSING = 0
replace PLANTCLOSING = 1 if inlist(DWREAS, 1)

gen LAYOFF = 0
replace LAYOFF = 1 if inlist(DWREAS, 2, 3)

*Create MARITALSTAT dummy
gen MARITAL = 0
replace MARITAL = 1 if inlist(MARST, 1, 2)

*Create dummy for kids


gen NCHILDBIN = 0
replace NCHILDBIN = 1 if NCHILD > 0

*Placeholder
gen pt = 1

*************************
*CREATE TREATED VAR
*************************

*WARN Indicator (ONLY STATES THAT ADOPT AFTER FEDERAL, I.E., EXCLUDING HI AND WI)
global state 6 10 17 33 34 36 19 50
global implement 2003 2019 2005 2010 2008 2009 2010 2015
global days 60 60 60 60 60 90 30 30 

global n_models : list sizeof global(state)
global m_models : list sizeof global(implement)
assert $n_models==$m_models

gen POSTTREAT = 0
gen CALDAYS = 0
gen TREATYEAR = .
forval i=1/$n_models {
	local this_depvar `: word `i' of $state'
	local this_controlvar `:word `i' of $implement'
	local this_controlvar2 `:word `i' of $days'
	replace POSTTREAT = 1 if STATEFIP == `this_depvar' & LOSSYEAR >= `this_controlvar' & DWSTAT == 1
	replace CALDAYS = `this_controlvar2' if STATEFIP == `this_depvar' & LOSSYEAR >= `this_controlvar' & DWSTAT == 1
	replace TREATYEAR = `this_controlvar' if STATEFIP == `this_depvar'
}

*Eventually treated indicator
gen EVENTUALTREAT = 0
replace EVENTUALTREAT = 1 if inlist(STATEFIP, 6, 10, 17, 33, 34, 36, 19, 50)

*************************
*CREATE NOTICE VARS
*************************

*Notice categorization: Binary Notice
gen NOTICEBIN = 1 if inlist(DWNOTICE, 2, 3, 4, 5)
replace NOTICEBIN = 0 if DWNOTICE == 1

*Notice categorization: Short vs Lenghty Notice
gen NOTICELEN = 1 if inlist(DWNOTICE, 3, 4)
replace NOTICELEN = 0 if inlist(DWNOTICE, 1, 2)

*Notice categorization: Ordered
gen NOTICEORD = DWNOTICE if inlist(DWNOTICE, 1, 2, 3, 4)

*************************
*CREATE ENFORCE. VARS
*************************

*One Third Waiver 
gen ONETHIRDWAIVER = 0
replace ONETHIRDWAIVER = 1 if inlist(STATEFIP, 19, 50, 6, 10) 

*High Penalty
gen HIGHPENALTY = 0
replace HIGHPENALTY = 1 if inlist(STATEFIP, 10, 33)

*High Liability
gen HIGHLIABILITY = 0 
replace HIGHLIABILITY = 1 if inlist(STATEFIP, 19, 17, 6, 36, 10, 33) // 8,006,920 observations 


*****************************************************
*SAMPLE SELECTION Table 2 Panel A ** Begin Filtering
******************************************************
//sample period filter
keep if inrange(LOSSYEAR, 1993, 2019) // sample period is from 1993 to 2019 

// filter out incomplete responses

drop if DWSTAT != 1 /*Must be displaced worker*/
drop if DWRESP != 2 /*Must have been interviewed*/
drop if inlist(DWNOTICE, 96, 97, 98, 99) // DWNOTICE reports whether or not the respondent received a written notice in advance of losing his/her job, and how much time he/she was given before he/she was terminated. 96-99 = refused, doesn't remember, no response, NIU
drop if !inlist(DWFULLTIME, 0, 1) /*Must be able to answer whether FT/PT*/ //
drop if !inlist(DWUNION, 0, 1, .) /*Must be able to answer question whether union/non-union*/ // 

// filter out state laws that specifically don't apply to our setting

drop if inlist(STATEFIP, 6, 10, 17, 33, 34, 36, 50) & !inlist(DWREAS, 1, 2, 3) /*For non-IA states, layoff/plant closing*/
drop if STATEFIP == 19 & !inlist(DWREAS, 1, 2, 3, 4) /*For IA, can be plant closed, layoff, or seasonal*/
drop if inlist(STATEFIP, 15, 55, 9, 10, 11, 12, 13, 20, 23, 24, 25, 26, 27, 38, 39, 47) /*Dropping recommending states HI, WI, CT, DC, FL, GA, KS, ME, MD, MA, MI, MN, ND, OH, TN*/

//drop if ONETHIRDWAIVER == 0 & PLANTCLOSING == 0 & EVENTUALTREAT == 1

// filter out moves

drop if DWMOVE != 1 /*moved since displacement*/ // 

///filter out inapplicable industries

drop if !inlist(DWCLASS, 2, 3) /*Must work in private for-profit or private non-profit to satisfy notice eligibility*/ //
gen keep_industry = 0
// exclude military industries, unclassifiable or undefined industries, private households and niche personal services. Keep major private-sector industres: 
replace keep_industry = 1 if inlist(DWIND1990, 100, 101, 102, 110, 111, 112, 120, 121, 122)
replace keep_industry = 1 if inlist(DWIND1990, 142, 150, 151, 160, 161, 162, 171, 172)
replace keep_industry = 1 if inlist(DWIND1990, 180, 181, 182, 190, 191, 192, 200, 201)
replace keep_industry = 1 if inlist(DWIND1990, 210, 211, 212, 230, 231, 241, 242)
replace keep_industry = 1 if inlist(DWIND1990, 250, 251, 262, 270, 271, 272, 280, 281, 282)
replace keep_industry = 1 if inlist(DWIND1990, 290, 291, 300, 310, 311, 312, 320, 331, 332)
replace keep_industry = 1 if inlist(DWIND1990, 340, 341, 342, 350, 351, 352, 360, 370)
replace keep_industry = 1 if inlist(DWIND1990, 371, 372, 400, 401, 410, 411, 420, 421, 432)
replace keep_industry = 1 if inlist(DWIND1990, 440, 441, 442, 450, 451, 452, 470, 471, 472)
replace keep_industry = 1 if inlist(DWIND1990, 500, 501, 502, 510, 511, 512, 521, 530)
replace keep_industry = 1 if inlist(DWIND1990, 540, 541, 550, 562, 580, 591, 600, 601, 611)
replace keep_industry = 1 if inlist(DWIND1990, 612, 620, 621, 623, 631, 633, 641, 642, 651, 652, 682)
replace keep_industry = 1 if inlist(DWIND1990, 700, 702, 710, 711, 712, 721, 722, 731, 732)
replace keep_industry = 1 if inlist(DWIND1990, 740, 741, 742, 750, 751, 760, 762, 771, 772)
replace keep_industry = 1 if inlist(DWIND1990, 800, 810, 812, 820, 831, 832, 840, 841, 842)
replace keep_industry = 1 if inlist(DWIND1990, 850, 862, 870, 871, 882, 890, 891, 892)

keep if keep_industry == 1 // 19,488

// filter out occupations that appear less than 10 times in sample 
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis/data/built"

*merge occupation info
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/data/cpsocc/"
*sort OCC1990
merge m:1  OCC1990 using cps_occ.dta, keepusing(GROUP)
keep if _merge == 1 | _merge == 3
drop _merge
rename GROUP OCC1990_GROUP

*sort DWOCC1990
merge m:1 DWOCC1990 using cps_occ.dta, keepusing(GROUP)
keep if _merge == 1 | _merge == 3
drop _merge
rename GROUP DWOCC1990_GROUP

*drop occupations that appear less than 10 times in sample
bysort DWOCC1990: gen occ_count = _N
drop if occ_count < 10 

*****************
*End Filtering
*****************

**** Post-correction: Adding child variable to update to 2019***

merge 1:m YEAR CPSIDP using "C:\Users\mrabier\Dropbox\JOLE WARN Act\Analysis\data\cps2023\cps_2023_nchild.dta" // NCHILD was not saved in the original CPS 2023, so adding back 
drop if _merge == 2
drop _merge 
drop NCHILDBIN

*Create dummy for kids
gen NCHILDBIN = 0
replace NCHILDBIN = 1 if NCHILD > 0

drop if YEAR > 2020 // 18,309


*****************************************************

*************************
*CREATE OUTCOME VARS
*************************
rename EDUC_CLEAN EDUC
rename DWHI HI
rename DWFULLTIME FULLTIME
rename DWUNION UNION
rename AGE_LOSSYEAR AGE
rename EXP_LOSSYEAR EXP 
rename GDPPERCAPITA_LOSSYEAR GDPPERCAPITA 
rename UNEMPRATE_LOSSYEAR UNEMPRATE 
rename DWYEARS YEARS
rename tGDP_LOSSYEAR GDP

*Code to missing when appropriate
recode DWWEEKL (9999/10000 = .a)
recode DWWEEKC (9999/10000 = .a)
recode DWBEN (96/97 = .a)
recode DWEXBEN (96/99 =.a)
recode DWWAGEL (99/100 = .a)
recode DWWAGEC (99/100 = .a)
recode DWJOBSINCE (95/98 = .a)
recode DWWKSUN (996/999 = .a )
recode AHRSWORK1 (996/999 = .a )

*Indicator for securing any job
gen ANYJOB = 0 if DWJOBSINCE == 0
replace ANYJOB = 1 if inrange(DWJOBSINCE,1,95)

*Joblessness period in between jobs
gen JOBLESS = 0
replace JOBLESS = 1 if DWWKSUN > 0

gen JOBLESSGT1WK = 0
replace JOBLESSGT1WK = 1 if DWWKSUN > 1

*long term employed
gen CUREMP = 0
replace CUREMP = 1 if inlist(EMPSTAT, 10, 12)

*status downgrade --NEW - did prior fulltime workers find fulltime work after?

g FTtoPT = 0
replace FTtoPT = 1 if FULLTIME == 1 & AHRSWORK1  < 35
replace FTtoPT = [.] if FULLTIME !=1
replace FTtoPT = . if EMPSTAT != 10 & EMPSTAT != 12  // Only keep currently employed


gen DWWEEKL_YR = DWWEEKL * 52 if DWWEEKL < 9999 

gen FEMALE = 0
replace FEMALE = 1 if SEX == 2

*log earnings variables

gen DWWEEKLLOG = log(1+DWWEEKL) if DWWEEKL !=[.]
gen DWWEEKCLOG = log(1+DWWEEKC) if DWWEEKC !=[.]
gen DWWEEKLYR_LOG = log(1+DWWEEKL_YR) if DWWEEKL_YR !=[.]

label variable FULLTIME "Full-time"
label variable UNION "Union Member"
label variable YEARS "Years at Employer"
label variable HI "Health Insurance"
label variable MARITAL "Married"
label variable EDUC "Education"
label variable AGE "Age"
label variable NCHILD "No. Children"
label variable EXP "Work Experience"
label variable GDP "State GDP"
label variable UNEMPRATE "State Unemp. Rate"
label variable NOTICEBIN "Binary Notice"
label variable NOTICELEN "Lengthy Notice"
label variable NOTICEORD "Ordinal Notice"
label variable PLANTCLOSING "Estab. Closing"
label variable POSTTREAT "Treat"
label variable JOBLESS "Jobless"
label variable JOBLESSGT1WK "Jobless > 1 wk"
label variable DWWKSUN "Weeks Unemployed"
label variable CUREMP "Currently Employed"
label variable DWWEEKLLOG "Logged Weekly Earnings at Lost Job"
label variable DWWEEKCLOG "Logged Weekly Earnings at Current Job"
label variable FTtoPT "Full-time to Part-time"
label variable FTtoPT "Full-time to Part-time"
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/data/built"
save "cps_built_final_sample.dta", replace

*************************************
* SUMMARY STATS BY WORKER TYPE - TABLE 2 PANEL B
*************************************

cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/output/"

drop if ONETHIRDWAIVER == 0 & PLANTCLOSING == 0 & EVENTUALTREAT == 1

preserve
keep if POSTTREAT == 0
collapse (count) N=EDUC (mean) FULLTIME UNION YEARS HI MARITAL AGE DWWEEKL NCHILDBIN JOBLESS JOBLESSGT1WK DWWKSUN CUREMP FTtoPT DWWEEKC NOTICEBIN NOTICELEN NOTICEORD (median) MEDEARN = DWWEEKL_YR 
gen Group = "DW Control"
order Group 
save sample_dwcont.dta, replace
restore


*Treated Displaced Workers
preserve
keep if POSTTREAT == 1 
collapse (count) N=EDUC (mean) FULLTIME UNION YEARS HI MARITAL AGE DWWEEKL NCHILDBIN JOBLESS JOBLESSGT1WK DWWKSUN CUREMP FTtoPT DWWEEKC NOTICEBIN NOTICELEN NOTICEORD (median) MEDEARN = DWWEEKL_YR 
gen Group = "DW Treat"
order Group 
save sample_dwtreat.dta, replace
restore


*Treated Displaced Workers with Notice!
preserve
keep if POSTTREAT == 1 & NOTICEBIN == 1
collapse (count) N=EDUC (mean) FULLTIME UNION YEARS HI MARITAL AGE DWWEEKL NCHILDBIN JOBLESS JOBLESSGT1WK DWWKSUN CUREMP FTtoPT DWWEEKC NOTICEBIN NOTICELEN NOTICEORD (median) MEDEARN = DWWEEKL_YR 
gen Group = "DW Treat Rcvd Notice"
order Group
save sample_dwtreatnot.dta, replace
restore


*************************************
* EXPORT TO LATEX
*************************************
preserve
use sample_dwcont.dta, clear
append using sample_dwtreat.dta
append using sample_dwtreatnot.dta

drop Group
xpose, clear varname

rename v1 DW_Control
rename v2 DW_Treat
rename v3 DW_Treat_Rcvd_Notice

rename _varname variable

* write LaTeX to samplecomp.tex in the current working directory
dataout, save("samplecomp.tex") tex replace ///
    
* optional Excel too
export excel using "sample_summary.xlsx", firstrow(variables) sheet("SummaryStats") replace
restore
*End


**************************************
*Top Occupations and Industry
**************************************
* Top occupations
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/data/built"
use "cps_built_final_sample.dta", clear
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/output/"
drop if ONETHIRDWAIVER == 0 & PLANTCLOSING == 0 & EVENTUALTREAT == 1
bysort DWOCC1990: gen Freq = _N 
by DWOCC1990: gen dup = cond(_N == 1, 0, _n)
drop if dup > 1
gsort -Freq
keep DWOCC1990 Freq
drop if _n > 22
gen Rank = _n
decode DWOCC1990, gen(Occupation)
drop DW*
order Rank Occupation Freq
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/output/"
export excel using "topocc.xlsx", firstrow(variables) replace


* Top industries

cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/data/built"
use "cps_built_final_sample.dta", clear
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/output/"
drop if ONETHIRDWAIVER == 0 & PLANTCLOSING == 0 & EVENTUALTREAT == 1
bysort DWIND1990: gen Freq = _N 
by DWIND1990: gen dup = cond(_N == 1, 0, _n)
drop if dup > 1
gsort -Freq
keep DWIND1990 Freq
drop if _n > 21
gen Rank = _n
decode DWIND1990, gen(Industry)
drop DW*
order Rank Industry Freq
cd "C:\Users\mrabier\Dropbox\JOLE WARN Act\analysis/output/"
export excel using "topind.xlsx", firstrow(variables) replace

